1 Introduction

In this notebook we will exclude the doublets in the multiome using two approaches:

  • Applying permissive thresholds to the scrublet doublet scores (ATAC and RNA).
  • Overcluster to find and remove the clusters of B-T doublets.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(tidyverse)

2.2 Parameters

# Paths
path_to_obj <- here::here("multiome/results/R_objects/5.tonsil_multiome_integrated_using_wnn.rds")
path_to_save <- here::here("multiome/results/R_objects/6.tonsil_multiome_integrated_using_wnn_no_doublets.rds")


# Thresholds
max_doublet_score_rna <- 0.3
max_doublet_score_atac <- 0.3

2.3 Load data

tonsil <- readRDS(path_to_obj)

3 Cluster

DefaultAssay(tonsil) <- "RNA"
tonsil <- FindNeighbors(tonsil, reduction = "harmony_RNA", dims = 1:30)
tonsil <- FindClusters(tonsil, resolution = c(1.25, 1.5, 1.75, 2))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 47150
## Number of edges: 1622123
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8718
## Number of communities: 27
## Elapsed time: 10 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 47150
## Number of edges: 1622123
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8595
## Number of communities: 30
## Elapsed time: 10 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 47150
## Number of edges: 1622123
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8486
## Number of communities: 31
## Elapsed time: 10 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 47150
## Number of edges: 1622123
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8381
## Number of communities: 35
## Elapsed time: 11 seconds
print(colnames(tonsil@meta.data))
##  [1] "orig.ident"                          "nCount_RNA"                          "nFeature_RNA"                        "nCount_ATAC"                         "nFeature_ATAC"                       "library_name"                        "nucleosome_signal"                   "nucleosome_percentile"               "TSS.enrichment"                      "TSS.percentile"                      "high.tss"                            "pct_mt"                              "pct_ribosomal"                       "nCount_peaks"                        "nFeature_peaks"                      "gem_id"                              "donor_id"                            "sex"                                 "age"                                 "age_group"                           "hospital"                            "assay"                               "scrublet_doublet_scores"             "scrublet_predicted_doublet"          "scrublet_doublet_scores_scaled"      "scrublet_doublet_scores_atac"        "scrublet_predicted_doublet_atac"     "scrublet_doublet_scores_scaled_atac" "RNA_snn_res.1.25"                    "RNA_snn_res.1.5"                     "RNA_snn_res.1.75"                   
## [32] "RNA_snn_res.2"                       "seurat_clusters"

3.1 Visualize clusters

vars <- str_subset(colnames(tonsil@meta.data), "^RNA_snn_res")
clusters_gg <- purrr::map(vars, function(x) {
  p <- DimPlot(
    tonsil,
    group.by = x,
    reduction = "umap.rna",
    pt.size = 0.1, label = T
  )
  p
})
clusters_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

4 Filter

We will exclude cluster 8 (resolution = 1.25), as we saw in the previous notebook that corresponds to B-T doublets and has a high scrublet score. We will also filter cells with extreme doublet score:

tonsil$is_doublet <- 
  tonsil$RNA_snn_res.1.25 == "8" |
  tonsil$scrublet_doublet_scores > max_doublet_score_rna |
  tonsil$scrublet_doublet_scores_atac > max_doublet_score_atac
tonsil <- subset(tonsil, subset = is_doublet == FALSE)

Plot:

p <- DimPlot(
  tonsil,
  group.by = "library_name",
  reduction = "umap.rna",
  pt.size = 0.1
)
p

5 Save

saveRDS(tonsil, path_to_save)

6 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0     stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4       readr_1.4.0       tidyr_1.1.2       tibble_3.0.4      ggplot2_3.3.2     tidyverse_1.3.0   Signac_1.1.0.9000 Seurat_3.9.9.9010 BiocStyle_2.16.1 
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.18             tidyselect_1.1.0            RSQLite_2.2.1               AnnotationDbi_1.50.3        htmlwidgets_1.5.2           grid_4.0.3                  BiocParallel_1.22.0         Rtsne_0.15                  munsell_0.5.0               codetools_0.2-17            ica_1.0-2                   future_1.20.1               miniUI_0.1.1.1              withr_2.3.0                 colorspace_2.0-0            Biobase_2.48.0              OrganismDbi_1.30.0          knitr_1.30                  rstudioapi_0.12             stats4_4.0.3                ROCR_1.0-11                 tensor_1.5                  listenv_0.8.0               labeling_0.4.2              GenomeInfoDbData_1.2.3      polyclip_1.10-0             farver_2.0.3                bit64_4.0.5                 rprojroot_2.0.2             parallelly_1.21.0           vctrs_0.3.4                 generics_0.1.0              xfun_0.18                   biovizBase_1.36.0           BiocFileCache_1.12.1        lsa_0.73.2                  ggseqlogo_0.1               R6_2.5.0                    GenomeInfoDb_1.24.0         rsvd_1.0.3                  AnnotationFilter_1.12.0     bitops_1.0-6               
##  [43] spatstat.utils_1.17-0       reshape_0.8.8               DelayedArray_0.14.0         assertthat_0.2.1            promises_1.1.1              scales_1.1.1                nnet_7.3-14                 gtable_0.3.0                globals_0.13.1              goftest_1.2-2               ggbio_1.36.0                ensembldb_2.12.1            rlang_0.4.8                 RcppRoll_0.3.0              splines_4.0.3               rtracklayer_1.48.0          lazyeval_0.2.2              dichromat_2.0-0             broom_0.7.2                 checkmate_2.0.0             modelr_0.1.8                BiocManager_1.30.10         yaml_2.2.1                  reshape2_1.4.4              abind_1.4-5                 GenomicFeatures_1.40.1      backports_1.2.0             httpuv_1.5.4                Hmisc_4.4-1                 RBGL_1.64.0                 tools_4.0.3                 bookdown_0.21               ellipsis_0.3.1              RColorBrewer_1.1-2          BiocGenerics_0.34.0         ggridges_0.5.2              Rcpp_1.0.5                  plyr_1.8.6                  base64enc_0.1-3             progress_1.2.2              zlibbioc_1.34.0             RCurl_1.98-1.2             
##  [85] prettyunits_1.1.1           rpart_4.1-15                openssl_1.4.3               deldir_0.2-3                pbapply_1.4-3               cowplot_1.1.0               S4Vectors_0.26.0            zoo_1.8-8                   haven_2.3.1                 SummarizedExperiment_1.18.1 ggrepel_0.8.2               cluster_2.1.0               here_1.0.1                  fs_1.5.0                    magrittr_1.5                data.table_1.13.2           reprex_0.3.0                lmtest_0.9-38               RANN_2.6.1                  SnowballC_0.7.0             ProtGenerics_1.20.0         fitdistrplus_1.1-1          matrixStats_0.57.0          hms_0.5.3                   patchwork_1.1.0             mime_0.9                    evaluate_0.14               xtable_1.8-4                XML_3.99-0.3                jpeg_0.1-8.1                readxl_1.3.1                IRanges_2.22.1              gridExtra_2.3               compiler_4.0.3              biomaRt_2.44.4              KernSmooth_2.23-17          crayon_1.3.4                htmltools_0.5.0             mgcv_1.8-33                 later_1.1.0.1               Formula_1.2-4               lubridate_1.7.9            
## [127] DBI_1.1.0                   tweenr_1.0.1                dbplyr_1.4.4                MASS_7.3-53                 rappdirs_0.3.1              Matrix_1.2-18               cli_2.1.0                   parallel_4.0.3              igraph_1.2.6                GenomicRanges_1.40.0        pkgconfig_2.0.3             GenomicAlignments_1.24.0    foreign_0.8-80              plotly_4.9.2.1              xml2_1.3.2                  XVector_0.28.0              rvest_0.3.6                 VariantAnnotation_1.34.0    digest_0.6.27               sctransform_0.3.1           RcppAnnoy_0.0.16            graph_1.66.0                spatstat.data_1.4-3         Biostrings_2.56.0           cellranger_1.1.0            rmarkdown_2.5               leiden_0.3.5                fastmatch_1.1-0             htmlTable_2.1.0             uwot_0.1.8.9001             curl_4.3                    shiny_1.5.0                 Rsamtools_2.4.0             lifecycle_0.2.0             nlme_3.1-150                jsonlite_1.7.1              fansi_0.4.1                 viridisLite_0.3.0           askpass_1.1                 BSgenome_1.56.0             pillar_1.4.6                lattice_0.20-41            
## [169] GGally_2.0.0                fastmap_1.0.1               httr_1.4.2                  survival_3.2-7              glue_1.4.2                  spatstat_1.64-1             png_0.1-7                   bit_4.0.4                   ggforce_0.3.2               stringi_1.5.3               blob_1.2.1                  latticeExtra_0.6-29         memoise_1.1.0               irlba_2.3.3                 future.apply_1.6.0